#====# Performs the estimation window phase #====#
# WARNING: this script estimates thousands of market models. It took about 25min to execute in full on our machine

# Setup ----
library(tictoc)
library(bizdays)
library(showtext)
library(tidyverse)
library(tidylog, warn.conflicts = FALSE)
source("aux/estimation_function.R") # this wraps the function that we use for the estimation window

# prepare business calendar for the estimation window:
business_calendar <- create.calendar('biz_calendar', weekdays = c('saturday','sunday'))

# Import data ----
stocks <- read_rds("data_out/stocks_clean.rds")
index <- read_rds("data_out/predictive_SP500.rds")

# add indexes to the stocks data:
stocks <- stocks %>%
  left_join(index)

length(unique(stocks$gvkey)) 
# 502 firms of which:
length(unique(stocks$gvkey[stocks$FCPA_sample == 1])) 
# 262 firms with an FCPA history
length(unique(stocks$gvkey[stocks$FCPA_sample == 0])) 
# 240 other securities for firms that have no FCPA history (and that are not S&P500 constituents)

# check that we have no duplicates per ticker-date:
stocks %>%
  group_by(ticker_symbol, date) %>%
  filter(n() > 1)
# alright

# Estimation ----
tickers <- unique(stocks$ticker_symbol)

# we estimate all possible combinations over window lengths of 1, 3, 6 months (30, 90, 180 biz days) before the event
# and of 3, 5, 10, 15 LASSO CV folds (total: 12 estimations). Next, we estimate OLS models (with S&P 500 index, same windows)

# the estimation function is coded in the aux/estimation_window.R file so that we can more easily apply it here,
# by just tweaking the parameters we wanna change (window lengths, LASSO folds, LASSO vs OLS, etc.)

est_end <- -5L
eve_end <- +13L

## Estimation window: [-30,-5], 3 folds ----
tic()
win30_3folds <- map(.x = tickers,
                    .f = estimation_window,
                    data = stocks,
                    calendar = business_calendar,
                    event_date = as.Date("2025-02-10"),
                    estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                    seed = 160493, n_folds = 3, LASSO_first_col = "AAPL",
                    LASSO_rmall_NAs = TRUE,
                    .progress = TRUE) 

## Estimation window: [-90,-5], 3 folds ----
win90_3folds <- map(.x = tickers,
                    .f = estimation_window,
                    data = stocks,
                    calendar = business_calendar,
                    event_date = as.Date("2025-02-10"),
                    estim_beg = -90L, estim_end = est_end, event_end = eve_end,
                    seed = 160493, n_folds = 3, LASSO_first_col = "AAPL",
                    LASSO_rmall_NAs = TRUE,
                    .progress = TRUE) 

## Estimation window: [-180,-5], 3 folds ----
win180_3folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -180L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 3, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = FALSE,
                     .progress = TRUE) 

## Estimation window: [-30,-5], 5 folds ----
win30_5folds <- map(.x = tickers,
                    .f = estimation_window,
                    data = stocks,
                    calendar = business_calendar,
                    event_date = as.Date("2025-02-10"),
                    estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                    seed = 160493, n_folds = 5, LASSO_first_col = "AAPL",
                    LASSO_rmall_NAs = TRUE,
                    .progress = TRUE) 

## Estimation window: [-90,-5], 5 folds ----
win90_5folds <- map(.x = tickers,
                    .f = estimation_window,
                    data = stocks,
                    calendar = business_calendar,
                    event_date = as.Date("2025-02-10"),
                    estim_beg = -90L, estim_end = est_end, event_end = eve_end,
                    seed = 160493, n_folds = 5, LASSO_first_col = "AAPL",
                    LASSO_rmall_NAs = TRUE,
                    .progress = TRUE) 

## Estimation window: [-180,-5], 5 folds ----
win180_5folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -180L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 5, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = FALSE,
                     .progress = TRUE) 

## Estimation window: [-30,-5], 10 folds ----
win30_10folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 10, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = TRUE,
                     .progress = TRUE) 

## Estimation window: [-90,-5], 10 folds ----
win90_10folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -90L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 10, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = TRUE,
                     .progress = TRUE) 

## Estimation window: [-180,-5], 10 folds ----
win180_10folds <- map(.x = tickers,
                      .f = estimation_window,
                      data = stocks,
                      calendar = business_calendar,
                      event_date = as.Date("2025-02-10"),
                      estim_beg = -180L, estim_end = est_end, event_end = eve_end,
                      seed = 160493, n_folds = 10, LASSO_first_col = "AAPL",
                      LASSO_rmall_NAs = FALSE,
                      .progress = TRUE) 

## Estimation window: [-30,-5], 15 folds ----
win30_15folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 15, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = TRUE,
                     .progress = TRUE) 

## Estimation window: [-90,-5], 15 folds ----
win90_15folds <- map(.x = tickers,
                     .f = estimation_window,
                     data = stocks,
                     calendar = business_calendar,
                     event_date = as.Date("2025-02-10"),
                     estim_beg = -90L, estim_end = est_end, event_end = eve_end,
                     seed = 160493, n_folds = 15, LASSO_first_col = "AAPL",
                     LASSO_rmall_NAs = TRUE,
                     .progress = TRUE) 

## Estimation window: [-180,-5], 15 folds ----
win180_15folds <- map(.x = tickers,
                      .f = estimation_window,
                      data = stocks,
                      calendar = business_calendar,
                      event_date = as.Date("2025-02-10"),
                      estim_beg = -180L, estim_end = est_end, event_end = eve_end,
                      seed = 160493, n_folds = 15, LASSO_first_col = "AAPL",
                      LASSO_rmall_NAs = FALSE,
                      .progress = TRUE)

## Estimation window: [-30,-5], OLS ----
win30_ols <- map(.x = tickers,
                 .f = estimation_window,
                 data = stocks,
                 calendar = business_calendar,
                 event_date = as.Date("2025-02-10"),
                 estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                 estim_LASSO = FALSE, LASSO_first_col = "AAPL",
                 .progress = TRUE) 

## Estimation window: [-90,-5], OLS ----
win90_ols <- map(.x = tickers,
                 .f = estimation_window,
                 data = stocks,
                 calendar = business_calendar,
                 event_date = as.Date("2025-02-10"),
                 estim_beg = -90L, estim_end = est_end, event_end = eve_end,
                 estim_LASSO = FALSE, LASSO_first_col = "AAPL",
                 .progress = TRUE) 

## Estimation window: [-180,-5], OLS ----
win180_ols <- map(.x = tickers,
                  .f = estimation_window,
                  data = stocks,
                  calendar = business_calendar,
                  event_date = as.Date("2025-02-10"),
                  estim_beg = -180L, estim_end = est_end, event_end = eve_end,
                  estim_LASSO = FALSE, LASSO_first_col = "AAPL",
                  .progress = TRUE) 

## Export estimation results ----
out <- list("win30_3folds" = win30_3folds, "win30_5folds" = win30_5folds, 
            "win30_10folds" = win30_10folds, "win30_15folds" = win30_15folds,
            "win30_ols" = win30_ols,
            
            "win90_3folds" = win90_3folds, "win90_5folds" = win90_5folds, 
            "win90_10folds" = win90_10folds, "win90_15folds" = win90_15folds,
            "win90_ols" = win90_ols,
            
            "win180_3folds" = win180_3folds, "win180_5folds" = win180_5folds, 
            "win180_10folds" = win180_10folds, "win180_15folds" = win180_15folds,
            "win180_ols" = win180_ols)

write_rds(out, "estim/estimation_windows.rds")
toc()
# it takes about 13 min to run until here on my machine

# Adopt best fitting model ----
# we take:
# - as preferred estimation the one with 30 days, 15 folds
# - as robustness for the length, those with 180 days, 15 folds and 90 days, 15 folds
# - as robustness for the folds, those with 30 days and 10, 5, 3 folds
# - as robustness for LASSO vs OLS, those with 180 days of estimation window

## Add fitted and abnormal returns ----
# Save estimates from the window with 30 days and 15 folds:
estim <- map(.x = 1:length(tickers),
             .f = ~win30_15folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg" = "fitted_LASSO",
         "abn_chg" = "abnormal_LASSO")

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(colnames(estim))

# add R2, too
fits <- map(.x = 1:length(tickers),
            .f = ~win30_15folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2, .after = abn_chg)

## Add other estimation results for robustness tests ----
# Add 180 days with 15 folds and 90 days with 15 folds
estim <- map(.x = 1:length(tickers),
             .f = ~win180_15folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_180d15f" = "fitted_LASSO",
         "abn_chg_180d15f" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("180d15f$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win180_15folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_180d15f" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_180d15f, .after = abn_chg_180d15f)

estim <- map(.x = 1:length(tickers),
             .f = ~win90_15folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_90d15f" = "fitted_LASSO",
         "abn_chg_90d15f" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("90d15f$"), .after = abn_chg_180d15f)

fits <- map(.x = 1:length(tickers),
            .f = ~win90_15folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_90d15f" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_90d15f, .after = abn_chg_90d15f)

estim <- map(.x = 1:length(tickers),
             .f = ~win30_10folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_30d10f" = "fitted_LASSO",
         "abn_chg_30d10f" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("30d10f$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win30_10folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_30d10f" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_30d10f, .after = abn_chg_30d10f)

estim <- map(.x = 1:length(tickers),
             .f = ~win30_5folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_30d5f" = "fitted_LASSO",
         "abn_chg_30d5f" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("30d5f$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win30_5folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_30d5f" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_30d5f, .after = abn_chg_30d5f)

estim <- map(.x = 1:length(tickers),
             .f = ~win30_3folds[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_30d3f" = "fitted_LASSO",
         "abn_chg_30d3f" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("30d3f$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win30_3folds[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_30d3f" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_30d3f, .after = abn_chg_30d3f)

estim <- map(.x = 1:length(tickers),
             .f = ~win180_ols[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_180dols" = "fitted_OLS",
         "abn_chg_180dols" = "abnormal_OLS") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("180dols$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win180_ols[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_180dols" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_180dols, .after = abn_chg_180dols)

## Add also one set of results with LASSO, [-30, -1], 15 CV folds for robustness ----
win_var <- map(.x = tickers,
               .f = estimation_window,
               data = stocks,
               calendar = business_calendar,
               event_date = as.Date("2025-02-10"),
               estim_beg = -30L, estim_end = -1L, event_end = eve_end,
               seed = 160493, n_folds = 15,
               LASSO_rmall_NAs = TRUE, LASSO_first_col = "AAPL",
               .progress = TRUE) 

estim <- map(.x = 1:length(tickers),
             .f = ~win_var[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg_var" = "fitted_LASSO",
         "abn_chg_var" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks <- stocks %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("var$"), .after = abn_chg)

fits <- map(.x = 1:length(tickers),
            .f = ~win_var[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2_var" = "R2")

stocks <- stocks %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2_var, .after = abn_chg_var)

# Compute cumulative abnormal returns ----
stocks <- stocks %>%
  mutate(event = as.Date("2025-02-10")) %>%
  left_join(stocks %>%
              mutate(event = as.Date("2025-02-10")) %>%
              # we compute CARs from the event day onward:
              filter(date >= event) %>%
              arrange(ticker_symbol, date) %>%
              group_by(ticker_symbol) %>%
              select(date, ticker_symbol, 
                     abn_chg, abn_chg_30d10f, abn_chg_30d5f, abn_chg_30d3f, abn_chg_90d15f, abn_chg_180d15f, abn_chg_var,
                     abn_chg_180dols) %>%
              reframe(date = unique(date),
                      car = cumsum(coalesce(abn_chg, 0)),
                      car_30d10f = cumsum(coalesce(abn_chg_30d10f, 0)),
                      car_30d5f = cumsum(coalesce(abn_chg_30d5f, 0)), 
                      car_30d3f = cumsum(coalesce(abn_chg_30d3f, 0)),
                      car_90d15f = cumsum(coalesce(abn_chg_90d15f, 0)),
                      car_180d15f = cumsum(coalesce(abn_chg_180d15f, 0)),
                      car_180dols = cumsum(coalesce(abn_chg_180dols, 0)),
                      car_var = cumsum(coalesce(abn_chg_var, 0))),
            by = c("ticker_symbol", "date"))

# Reorder columns ----
stocks <- stocks %>%
  select(-estimwindow, -eventwindow, -prchd, -prcld, -prcod) %>%
  rename("obs_chg" = "chg") %>%
  relocate(ticker_symbol, date, conm, gvkey, cusip, exchg, exchgn, FCPA_sample, matched, is_SP500, naics2, # IDs 
           # FCPA variables:
           enforcement_action, sanction_sum, sanction_avg, ongoing, latest_year, earliest_year, no_year, ticker_change, tot_actions,
           n_subsid_nonUS, n_subsid_US, n_subsid, n_unique_iso2, atq, avg_dec, # covariates
           # event variables and stocks data:
           event, prccd, obs_chg, # observed
           cshoq, # outstanding shares
           fit_chg, abn_chg, fit_R2, car, # default DVs
           fit_chg_90d15f, abn_chg_90d15f, fit_R2_90d15f, car_90d15f, # DVs with longer windows (90d)
           fit_chg_180d15f, abn_chg_180d15f, fit_R2_180d15f, car_180d15f, # DVs with longer windows (180d)
           fit_chg_30d10f, abn_chg_30d10f, fit_R2_30d10f, car_30d10f, # DVs with fewer folds (10f)
           fit_chg_30d5f, abn_chg_30d5f, fit_R2_30d5f, car_30d5f, # DVs with fewer folds (5f)
           fit_chg_30d3f, abn_chg_30d3f, fit_R2_30d3f, car_30d3f, # DVs with fewer folds (3f)
           fit_chg_var, abn_chg_var, fit_R2_var, car_var, # variation of LASSO
           fit_chg_180dols, abn_chg_180dols, fit_R2_180dols, car_180dols # 180 days OLS
  )

# Remove predictive indexes ----
options(max.print = 50000)
colnames(stocks)
# from "AAPL" onward, it's all predictive indices

drop <- stocks %>%
  select("AAPL":ncol(.)) %>%
  colnames()
drop # ok, all predictive indexes

stocks <- stocks %>%
  select(-all_of(drop))

colnames(stocks) # perfect

# Drop firms that have missing abn_chg information on the day of the event ----
# these are firms for whom the estimation window failed, due to sparse trading data around the event
keep <- stocks %>%
  filter(date == as.Date("2025-02-10")) %>%
  filter(!is.na(abn_chg)) %>%
  select(ticker_symbol) %>%
  pull() %>%
  unique()

stocks <- stocks %>%
  filter(ticker_symbol %in% keep)

# Export ----
write_rds(stocks, "data_out/stocks_analysis.rds")

# Replicate the procedure on alternative matching datasets ----
rm(list = setdiff(ls(), c("estimation_window", "business_calendar", "est_end", "eve_end", "index")))

stocks_cem <- read_rds("data_out/stocks_clean_CEM.rds") %>%
  # add indexes to the stocks data:
  left_join(index)

stocks_ent <- read_rds("data_out/stocks_clean_entropy.rds") %>%
  # add indexes to the stocks data:
  left_join(index)

## Coarsened exact matching, estimation window ----
tickers <- unique(stocks_cem$ticker_symbol)

# we keep estimation default values as it's just for a robustness test:
window_cem <- map(.x = tickers,
                  .f = estimation_window,
                  data = stocks_cem,
                  calendar = business_calendar,
                  event_date = as.Date("2025-02-10"),
                  estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                  seed = 160493, n_folds = 15, LASSO_first_col = "AAPL",
                  LASSO_rmall_NAs = TRUE,
                  .progress = TRUE) 
# it takes about 1m

estim <- map(.x = 1:length(tickers),
             .f = ~window_cem[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg" = "fitted_LASSO",
         "abn_chg" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks_cem <- stocks_cem %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("\\_chg$"), .after = chg)

fits <- map(.x = 1:length(tickers),
            .f = ~window_cem[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2" = "R2")

stocks_cem <- stocks_cem %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2, .after = abn_chg)

# compute CARs:
stocks_cem <- stocks_cem %>%
  mutate(event = as.Date("2025-02-10")) %>%
  left_join(stocks_cem %>%
              mutate(event = as.Date("2025-02-10")) %>%
              filter(date >= event) %>%
              arrange(ticker_symbol, date) %>%
              group_by(ticker_symbol) %>%
              select(date, ticker_symbol, event, abn_chg) %>%
              reframe(date = unique(date),
                      car = cumsum(coalesce(abn_chg, 0))),
            by = c("ticker_symbol", "date"))

# reorder columns 
stocks_cem <- stocks_cem %>%
  select(-prchd, -prcld, -prcod) %>%
  rename("obs_chg" = "chg") %>%
  relocate(ticker_symbol, date, conm, gvkey, cusip, exchg, exchgn, FCPA_sample, matched, is_SP500, naics2, # IDs 
           # FCPA variables:
           enforcement_action, sanction_sum, sanction_avg, ongoing, latest_year, earliest_year, no_year, ticker_change, tot_actions, 
           n_subsid_nonUS, n_subsid_US, n_subsid, n_unique_iso2, atq, avg_dec, # covariates
           # event variables and stocks data:
           event, prccd, obs_chg, # observed
           cshoq, # outstanding shares
           fit_chg, abn_chg, fit_R2, car # default DVs
  )

# remove predictive indexes 
options(max.print = 50000)
colnames(stocks_cem)
# from "AAPL" onward, it's all predictive indices

drop <- stocks_cem %>%
  select("AAPL":ncol(.)) %>%
  colnames()
drop # ok, all predictive indexes

stocks_cem <- stocks_cem %>%
  select(-all_of(drop))

colnames(stocks_cem) # perfect

### Export ----
write_rds(stocks_cem, "data_out/stocks_analysis_CEM.rds")

## Entropy balancing, estimation window ----
tickers <- unique(stocks_ent$ticker_symbol)
window_ent <- map(.x = tickers,
                  .f = estimation_window,
                  data = stocks_ent,
                  calendar = business_calendar,
                  event_date = as.Date("2025-02-10"),
                  estim_beg = -30L, estim_end = est_end, event_end = eve_end,
                  seed = 160493, n_folds = 15, LASSO_first_col = "AAPL",
                  LASSO_rmall_NAs = TRUE,
                  .progress = TRUE) 
# this one takes 8m (we're estimating 3,670 lasso models...)

estim <- map(.x = 1:length(tickers),
             .f = ~window_ent[[.x]]$counterfactuals) %>%
  bind_rows() %>%
  select(-chg) %>%
  rename("fit_chg" = "fitted_LASSO",
         "abn_chg" = "abnormal_LASSO") %>%
  select(-estimwindow, -eventwindow)

stocks_ent <- stocks_ent %>%
  left_join(estim, 
            by = c("date", "ticker_symbol")) %>%
  relocate(matches("\\_chg$"), .after = chg)

fits <- map(.x = 1:length(tickers),
            .f = ~window_ent[[.x]]$fit) %>%
  bind_rows() %>%
  rename("ticker_symbol" = "ticker",
         "fit_R2" = "R2")

stocks_ent <- stocks_ent %>%
  left_join(fits, by = "ticker_symbol") %>%
  relocate(fit_R2, .after = abn_chg)

# compute CARs:
stocks_ent <- stocks_ent %>%
  mutate(event = as.Date("2025-02-10")) %>%
  left_join(stocks_ent %>%
              mutate(event = as.Date("2025-02-10")) %>%
              filter(date >= event) %>%
              arrange(ticker_symbol, date) %>%
              group_by(ticker_symbol) %>%
              select(date, ticker_symbol, event, abn_chg) %>%
              reframe(date = unique(date),
                      car = cumsum(coalesce(abn_chg, 0))),
            by = c("ticker_symbol", "date"))

# reorder columns 
stocks_ent <- stocks_ent %>%
  select(-prchd, -prcld, -prcod) %>%
  rename("obs_chg" = "chg") %>%
  relocate(ticker_symbol, date, conm, gvkey, cusip, exchg, exchgn, FCPA_sample, matched, is_SP500, naics2, # IDs 
           # FCPA variables:
           enforcement_action, sanction_sum, sanction_avg, ongoing, latest_year, earliest_year, no_year, ticker_change, tot_actions,
           n_subsid_nonUS, n_subsid_US, n_subsid, n_unique_iso2, atq, avg_dec, # covariates
           # event variables and stocks data:
           event, prccd, obs_chg, # observed
           cshoq, # outstanding shares
           fit_chg, abn_chg, fit_R2, car # default DVs
  )

# remove predictive indexes 
options(max.print = 50000)
colnames(stocks_ent)
# from "AAPL" onward, it's all predictive indices

drop <- stocks_ent %>%
  select("AAPL":ncol(.)) %>%
  colnames()
drop # ok, all predictive indexes

stocks_ent <- stocks_ent %>%
  select(-all_of(drop))

colnames(stocks_ent) # perfect

### Export ----
write_rds(stocks_ent, "data_out/stocks_analysis_entropy.rds")

#====# The End #====#